What do we mean by learning?
So, we'll stick with a simple definition of learning
In [1]:
#From the learning examples in Nengo - a Communication Channel
%pylab inline
import nengo
from nengo.processes import WhiteSignal
model = nengo.Network('Learn a Communication Channel')
with model:
stim = nengo.Node(output=WhiteSignal(10, high=5, rms=0.5), size_out=2)
pre = nengo.Ensemble(60, dimensions=2)
post = nengo.Ensemble(60, dimensions=2)
nengo.Connection(stim, pre)
conn = nengo.Connection(pre, post, function=lambda x: np.random.random(2))
inp_p = nengo.Probe(stim)
pre_p = nengo.Probe(pre, synapse=0.01)
post_p = nengo.Probe(post, synapse=0.01)
sim = nengo.Simulator(model)
#sim.run(10.0)
In [9]:
from nengo_gui.ipython import IPythonViz
IPythonViz(model,'configs/pre_learn.py.cfg')
In [3]:
t=sim.trange()
figure(figsize=(12, 8))
subplot(2, 1, 1)
plot(t, sim.data[inp_p].T[0], c='k', label='Input')
plot(t, sim.data[pre_p].T[0], c='b', label='Pre')
plot(t, sim.data[post_p].T[0], c='r', label='Post')
ylabel("Dimension 1")
legend(loc='best')
title('Random function computation')
subplot(2, 1, 2)
plot(t, sim.data[inp_p].T[1], c='k', label='Input')
plot(t, sim.data[pre_p].T[1], c='b', label='Pre')
plot(t, sim.data[post_p].T[1], c='r', label='Post')
ylabel("Dimension 2")
legend(loc='best');
In [2]:
#Now learn
with model:
error = nengo.Ensemble(60, dimensions=2)
error_p = nengo.Probe(error, synapse=0.03)
# Error = actual - target = post - pre
nengo.Connection(post, error)
nengo.Connection(pre, error, transform=-1)
# Add the learning rule to the connection
conn.learning_rule_type = nengo.PES()
# Connect the error into the learning rule
learn_conn = nengo.Connection(error, conn.learning_rule)
sim = nengo.Simulator(model)
sim.run(10.0)
In [3]:
from nengo_gui.ipython import IPythonViz
IPythonViz(model,'configs/simple_learn.py.cfg')
In [7]:
t=sim.trange()
figure(figsize=(12, 8))
subplot(3, 1, 1)
plot(t, sim.data[inp_p].T[0], c='k', label='Input')
plot(t, sim.data[pre_p].T[0], c='b', label='Pre')
plot(t, sim.data[post_p].T[0], c='r', label='Post')
ylabel("Dimension 1")
legend(loc='best')
title('Learn a communication channel')
subplot(3, 1, 2)
plot(t, sim.data[inp_p].T[1], c='k', label='Input')
plot(t, sim.data[pre_p].T[1], c='b', label='Pre')
plot(t, sim.data[post_p].T[1], c='r', label='Post')
ylabel("Dimension 2")
legend(loc='best');
subplot(3, 1, 3)
plot(sim.trange(), sim.data[error_p], c='b')
ylim(-1, 1)
legend(("Error[0]", "Error[1]"), loc='best');
title('Error')
Out[7]:
In [4]:
#Turning learning on and off to test generalization
def inhibit(t):
return 2.0 if t > 10.0 else 0.0
with model:
inhib = nengo.Node(inhibit)
inhib_conn = nengo.Connection(inhib, error.neurons, transform=[[-1]] * error.n_neurons)
sim = nengo.Simulator(model)
#sim.run(16.0)
In [5]:
from nengo_gui.ipython import IPythonViz
IPythonViz(model,'configs/control_learn.py.cfg')
In [9]:
t=sim.trange()
figure(figsize=(12, 8))
subplot(3, 1, 1)
plot(t, sim.data[inp_p].T[0], c='k', label='Input')
plot(t, sim.data[pre_p].T[0], c='b', label='Pre')
plot(t, sim.data[post_p].T[0], c='r', label='Post')
ylabel("Dimension 1")
legend(loc='best')
title('Learn a communication channel')
subplot(3, 1, 2)
plot(t, sim.data[inp_p].T[1], c='k', label='Input')
plot(t, sim.data[pre_p].T[1], c='b', label='Pre')
plot(t, sim.data[post_p].T[1], c='r', label='Post')
ylabel("Dimension 2")
legend(loc='best');
subplot(3, 1, 3)
plot(sim.trange(), sim.data[error_p], c='b')
ylim(-1, 1)
legend(("Error[0]", "Error[1]"), loc='best');
title('Error')
Out[9]:
In [6]:
#Compute a nonlinear functions
#model.connections.remove(err_fcn) #uncomment to try other fcns
#del err_fcn
model.connections.remove(inhib_conn)
del inhib_conn
model.nodes.remove(inhib)
model.connections.remove(learn_conn)
del learn_conn
def nonlinear(x):
return x[0]*x[0], x[1]*x[1]
with model:
err_fcn = nengo.Connection(pre, error, function=nonlinear, transform=-1)
conn.learning_rule_type = nengo.PES(learning_rate=1e-4)
# Connect the error into the learning rule
learn_conn = nengo.Connection(error, conn.learning_rule)
sim = nengo.Simulator(model)
#sim.run(26.0)
In [7]:
from nengo_gui.ipython import IPythonViz
IPythonViz(model,'configs/square_learn.py.cfg')
In [28]:
t=sim.trange()
figure(figsize=(12, 8))
subplot(3, 1, 1)
plot(t, sim.data[inp_p].T[0], c='k', label='Input')
plot(t, sim.data[pre_p].T[0], c='b', label='Pre')
plot(t, sim.data[post_p].T[0], c='r', label='Post')
ylabel("Dimension 1")
legend(loc='best')
title('Learn a nonlinear function')
subplot(3, 1, 2)
plot(t, sim.data[inp_p].T[1], c='k', label='Input')
plot(t, sim.data[pre_p].T[1], c='b', label='Pre')
plot(t, sim.data[post_p].T[1], c='r', label='Post')
ylabel("Dimension 2")
legend(loc='best');
subplot(3, 1, 3)
plot(sim.trange(), sim.data[error_p], c='b')
ylim(-1, 1)
legend(("Error[0]", "Error[1]"), loc='best');
title('Error')
Out[28]:
Classical or Pavlovian conditioning uses an unconditioned stimuli (US) (meat for a dog) that ellicits an unconditioned response (UR) (salivating) to cause a conditioned response (CR) (salivating after learning) to be ellicited by a conditioned stimulus (CS) (ringing a bell).
The best known model of this is the Rescorla-Wagner model that states:
$\Delta V_x = \alpha (\lambda - \sum_x V)$
where $V_x$ is the value of conditioned stimulus $x$, $\alpha$ is a learning rate and salience parameter, $\lambda$ is the max value (usually 1).
In this model:
In [8]:
import nengo
import numpy as np
D = 3
N = D*50
def us_stim(t):
# cycle through the three US
t = t % 3
if 0.9 < t< 1: return [1, 0, 0]
if 1.9 < t< 2: return [0, 1, 0]
if 2.9 < t< 3: return [0, 0, 1]
return [0, 0, 0]
def cs_stim(t):
# cycle through the three CS
t = t % 3
if 0.7 < t< 1: return [0.7, 0, 0.5]
if 1.7 < t< 2: return [0.6, 0.7, 0.8]
if 2.7 < t< 3: return [0, 1, 0]
return [0, 0, 0]
model = nengo.Network(label="Classical Conditioning")
with model:
us_stim = nengo.Node(us_stim)
cs_stim = nengo.Node(cs_stim)
us = nengo.Ensemble(N, D)
cs = nengo.Ensemble(N*2, D*2)
nengo.Connection(us_stim, us[:D])
nengo.Connection(cs_stim, cs[:D])
nengo.Connection(cs[:D], cs[D:], synapse=0.2)
ur = nengo.Ensemble(N, D)
nengo.Connection(us, ur)
cr = nengo.Ensemble(N, D)
learn_conn = nengo.Connection(cs, cr, function=lambda x: [0]*D)
learn_conn.learning_rule_type = nengo.PES(learning_rate=3e-4)
error = nengo.Ensemble(N, D)
nengo.Connection(error, learn_conn.learning_rule)
nengo.Connection(ur, error, transform=-1)
nengo.Connection(cr, error, transform=1, synapse=0.1)
stop_learn = nengo.Node([0])
nengo.Connection(stop_learn, error.neurons, transform=-10*np.ones((N, 1)))
In [9]:
from nengo_gui.ipython import IPythonViz
IPythonViz(model,'configs/learning2-conditioning.py.cfg')
In this model:
In [16]:
import nengo
import numpy as np
tau_slow = 0.2
model = nengo.Network("Cortical Consolidation")
with model:
pre_value = nengo.Node(lambda t: np.sin(t))
pre = nengo.Ensemble(100, 1)
post = nengo.Ensemble(100, 1)
target = nengo.Ensemble(100, 1)
nengo.Connection(pre_value, pre)
conn = nengo.Connection(pre, post, function=lambda x: np.random.random(),
learning_rule_type=nengo.PES())
wm = nengo.Ensemble(300, 2, radius=1.4)
context = nengo.Node(1)
nengo.Connection(context, wm[1])
nengo.Connection(pre, wm[0], synapse=tau_slow)
nengo.Connection(wm, target, synapse=tau_slow,
function=lambda x: x[0]*x[1])
error = nengo.Ensemble(n_neurons=100, dimensions=1)
nengo.Connection(post, error, synapse=tau_slow*2, transform=1) #Delay the fast connection so they line up
nengo.Connection(target, error, transform=-1)
nengo.Connection(error, conn.learning_rule)
stop_learn = nengo.Node([0])
nengo.Connection(stop_learn, error.neurons, transform=-10*np.ones((100,1)))
both = nengo.Node(None, size_in=2) #For plotting
nengo.Connection(post, both[0], synapse=None)
nengo.Connection(target, both[1], synapse=None)
In [17]:
from nengo_gui.ipython import IPythonViz
IPythonViz(model,'configs/learning3-consolidation.py.cfg')
$\Delta Q(s,a) = \alpha (R + \gamma Q_{predicted} - Q_{old})$ where $R$ is reward, $\alpha$ is a learning rate and $\gamma$ is a discount factor.
In the model:
In [ ]:
import grid
mymap="""
#########
# #
# #
# ## #
# ## #
# #
#########
"""
class Cell(grid.Cell):
def color(self):
return 'black' if self.wall else None
def load(self, char):
if char == '#':
self.wall = True
world = grid.World(Cell, map=mymap, directions=4)
body = grid.ContinuousAgent()
world.add(body, x=1, y=3, dir=2)
import nengo
import numpy as np
def move(t, x):
speed, rotation = x
dt = 0.001
max_speed = 20.0
max_rotate = 10.0
body.turn(rotation * dt * max_rotate)
success = body.go_forward(speed * dt * max_speed)
if not success: #Hit a wall
return -1
else:
return speed
model = nengo.Network("Simple RL", seed=2)
with model:
env = grid.GridNode(world, dt=0.005)
#set up node to project movement commands to
movement_node = nengo.Node(move, size_in=2, label='reward')
movement = nengo.Ensemble(n_neurons=100, dimensions=2, radius=1.4)
nengo.Connection(movement, movement_node)
def detect(t):
#put 5 sensors between -45 to 45 compared to facing direction
angles = (np.linspace(-0.5, 0.5, 5) + body.dir ) % world.directions
return [body.detect(d, max_distance=4)[0] for d in angles]
stim_radar = nengo.Node(detect)
#set up low fidelity sensors; noise might help exploration
radar = nengo.Ensemble(n_neurons=50, dimensions=5, radius=4)
nengo.Connection(stim_radar, radar)
#set up BG to allow 3 actions (left/fwd/right)
bg = nengo.networks.actionselection.BasalGanglia(3)
thal = nengo.networks.actionselection.Thalamus(3)
nengo.Connection(bg.output, thal.input)
#start with a kind of random selection process, but like going fwd most
def u_fwd(x):
return 0.8
def u_left(x):
return 0.6
def u_right(x):
return 0.7
conn_fwd = nengo.Connection(radar, bg.input[0], function=u_fwd, learning_rule_type=nengo.PES())
conn_left = nengo.Connection(radar, bg.input[1], function=u_left, learning_rule_type=nengo.PES())
conn_right = nengo.Connection(radar, bg.input[2], function=u_right, learning_rule_type=nengo.PES())
nengo.Connection(thal.output[0], movement, transform=[[1],[0]])
nengo.Connection(thal.output[1], movement, transform=[[0],[1]])
nengo.Connection(thal.output[2], movement, transform=[[0],[-1]])
errors = nengo.networks.EnsembleArray(n_neurons=50, n_ensembles=3)
nengo.Connection(movement_node, errors.input, transform=-np.ones((3,1)))
#inhibit learning for actions not currently chosen (recall BG is high for non-chosen actions)
nengo.Connection(bg.output[0], errors.ensembles[0].neurons, transform=np.ones((50,1))*4)
nengo.Connection(bg.output[1], errors.ensembles[1].neurons, transform=np.ones((50,1))*4)
nengo.Connection(bg.output[2], errors.ensembles[2].neurons, transform=np.ones((50,1))*4)
nengo.Connection(bg.input, errors.input, transform=1)
nengo.Connection(errors.ensembles[0], conn_fwd.learning_rule)
nengo.Connection(errors.ensembles[1], conn_left.learning_rule)
nengo.Connection(errors.ensembles[2], conn_right.learning_rule)
In [ ]:
from nengo_gui.ipython import IPythonViz
IPythonViz(model,'configs/learning5-utility.py.cfg')
$Q(s,t) = R(s,t) + 0.9 R(s+1, t+1) + 0.9^2 R(s+2, t+2) + ...$.
$Q(s+1,t+1) = R(s+1,t+1) + 0.9 R(s+2, t+2) + 0.9^2 R(s+3, t+3) + ...$. $0.9 Q(s+1,t+1) = 0.9 R(s+1,t+1) + 0.9^2 R(s+2, t+2) + 0.9^3 R(s+3, t+3) + ...$.
$Q(s,t) = R(s,t) + 0.9 Q(s+1, t+1)$.
$Error(t) = Q(s-1) - (R(s-1) + 0.9 Q(s))$
In this model:
In [1]:
import grid
mymap="""
#######
# #
# # # #
# # # #
#G R#
#######
"""
class Cell(grid.Cell):
def color(self):
if self.wall:
return 'black'
elif self.reward > 0:
return 'green'
elif self.reward < 0:
return 'red'
return None
def load(self, char):
self.reward = 0
if char == '#':
self.wall = True
if char == 'G':
self.reward = 10
elif char == 'R':
self.reward = -10
world = grid.World(Cell, map=mymap, directions=4)
body = grid.ContinuousAgent()
world.add(body, x=1, y=2, dir=2)
import nengo
import numpy as np
tau=0.1
def move(t, x):
speed, rotation = x
dt = 0.001
max_speed = 20.0
max_rotate = 10.0
body.turn(rotation * dt * max_rotate)
body.go_forward(speed * dt * max_speed)
if int(body.x) == 1:
world.grid[4][4].wall = True
world.grid[4][2].wall = False
if int(body.x) == 4:
world.grid[4][2].wall = True
world.grid[4][4].wall = False
model = nengo.Network("Predict Value", seed=2)
with model:
env = grid.GridNode(world, dt=0.005)
movement = nengo.Node(move, size_in=2)
def detect(t):
angles = (np.linspace(-0.5, 0.5, 3) + body.dir) % world.directions
return [body.detect(d, max_distance=4)[0] for d in angles]
stim_radar = nengo.Node(detect)
radar = nengo.Ensemble(n_neurons=50, dimensions=3, radius=4, seed=2,
noise=nengo.processes.WhiteSignal(10, 0.1, rms=1))
nengo.Connection(stim_radar, radar)
def braiten(x):
turn = x[2] - x[0]
spd = x[1] - 0.5
return spd, turn
nengo.Connection(radar, movement, function=braiten)
def position_func(t):
return body.x / world.width * 2 - 1, 1 - body.y/world.height * 2, body.dir / world.directions
position = nengo.Node(position_func)
state = nengo.Ensemble(100, 3)
nengo.Connection(position, state, synapse=None)
reward = nengo.Node(lambda t: body.cell.reward)
value = nengo.Ensemble(n_neurons=50, dimensions=1)
learn_conn = nengo.Connection(state, value, function=lambda x: 0,
learning_rule_type=nengo.PES(learning_rate=1e-4,
pre_tau=tau))
nengo.Connection(reward, learn_conn.learning_rule,
transform=-1, synapse=tau)
nengo.Connection(value, learn_conn.learning_rule,
transform=-0.9, synapse=0.01)
nengo.Connection(value, learn_conn.learning_rule,
transform=1, synapse=tau)
In [2]:
from nengo_gui.ipython import IPythonViz
IPythonViz(model, 'configs/learning6-value.py.cfg')
In this example we again use the PES rule to learn an unknown function:
The PES rule takes the control output and treats it as the error
In [7]:
import pendulum as pd
import nengo
import numpy as np
model = nengo.Network(seed=3)
with model:
env = pd.PendulumNode(seed=1, mass=4, max_torque=100)
desired = nengo.Node(lambda t: np.sin(t*np.pi))
nengo.Connection(desired, env[1], synapse=None)
pid = pd.PIDNode(dimensions=1, Kp=1, Kd=0.2, Ki=0)
nengo.Connection(pid, env[0], synapse=None)
nengo.Connection(desired, pid[0], synapse=None, transform=1)
nengo.Connection(env[0], pid[1], synapse=0, transform=1)
nengo.Connection(env[3], pid[3], synapse=0, transform=1)
nengo.Connection(desired, pid[2], synapse=None, transform=1000)
nengo.Connection(desired, pid[2], synapse=0, transform=-1000)
state = nengo.Ensemble(n_neurons=1000, dimensions=1,
radius=1.5,
#neuron_type=nengo.LIFRate(),
)
nengo.Connection(env[0], state, synapse=None)
c = nengo.Connection(state, env[0], synapse=0,
function=lambda x: 0,
learning_rule_type=nengo.PES(learning_rate=1e-5))
stop_learning = nengo.Node(0)
error = nengo.Node(lambda t, x: x[0] if x[1]<0.5 else 0, size_in=2)
nengo.Connection(pid, error[0], synapse=None, transform=-1)
nengo.Connection(stop_learning, error[1], synapse=None)
nengo.Connection(error, c.learning_rule, synapse=None)
In [8]:
from nengo_gui.ipython import IPythonViz
IPythonViz(model, 'configs/pendulum.py.cfg')
In [31]:
%pylab inline
import nengo
model = nengo.Network()
with model:
sin = nengo.Node(lambda t: np.sin(t*4))
pre = nengo.Ensemble(100, dimensions=1)
post = nengo.Ensemble(100, dimensions=1)
nengo.Connection(sin, pre)
conn = nengo.Connection(pre, post, solver=nengo.solvers.LstsqL2(weights=True))
pre_p = nengo.Probe(pre, synapse=0.01)
post_p = nengo.Probe(post, synapse=0.01)
sim = nengo.Simulator(model)
sim.run(2.0)
In [32]:
plot(sim.trange(), sim.data[pre_p], label="Pre")
plot(sim.trange(), sim.data[post_p], label="Post")
ylabel("Decoded value")
legend(loc="best");
In [33]:
conn.learning_rule_type = nengo.BCM(learning_rate=5e-10)
with model:
trans_p = nengo.Probe(conn, 'weights', synapse=0.01, sample_every=0.01)
sim = nengo.Simulator(model)
sim.run(20.0)
In [39]:
figure(figsize=(12, 8))
subplot(2, 1, 1)
plot(sim.trange(), sim.data[pre_p], label="Pre")
plot(sim.trange(), sim.data[post_p], label="Post")
ylabel("Decoded value")
ylim(-1.6, 1.6)
legend(loc="lower left")
subplot(2, 1, 2)
# Find weight row with max variance
neuron = np.argmax(np.mean(np.var(sim.data[trans_p], axis=0), axis=1))
plot(sim.trange(dt=0.01), sim.data[trans_p][..., neuron])
ylabel("Connection weight");
In [48]:
def sparsity_measure(vector):
# Max sparsity = 1 (single 1 in the vector)
v = np.sort(np.abs(vector))
n = v.shape[0]
k = np.arange(n) + 1
l1norm = np.sum(v)
summation = np.sum((v / l1norm) * ((n - k + 0.5) / n))
return 1 - 2 * summation
print("Starting sparsity: {0}".format(sparsity_measure(sim.data[trans_p][0])))
print("Ending sparsity: {0}".format(sparsity_measure(sim.data[trans_p][-1])))
$\Delta \omega_{ij} = \kappa a_i (\alpha_j S e_j \cdot E + (1-S) a_j (a_j-\theta))$
In [ ]: